	\section{一元线性回归习题}
	
	\textbf{2}考虑过原点的线性回归模型:
	\begin{equation*}
		y_{i} = \beta_{1}x_{i} + \epsilon_{i}, i = 1,2,\cdots,n.
	\end{equation*}
	误差$\epsilon_{1},\epsilon_{2},\cdots,\epsilon_{n}$仍满足基本假定. 求$\beta_{1}$的最小二乘估计.
	
	\begin{proof}[\bf 解]
		进行OLS估计,即寻找$\hat{\beta}_{1}$,使得残差平方和最小化:
		\begin{equation*}
			\min\limits_{\hat{\beta}_{1}} \sum_{i=1}^{n} (y_{i}-\hat{\beta}_{1}x_{i})^{2}.
		\end{equation*}
		这是一元函数的极值问题,其一阶条件为
		\begin{equation*}
			\frac{\mathrm{d} }{\mathrm{d}\hat{\beta}_{1}}\sum_{i=1}^{n} (y_{i}-\hat{\beta}_{1}x_{i})^2=-2 \sum_{i=1}^{n}(y_{i}-\hat{\beta}_{1}x_{i})x_{i}=0.
		\end{equation*}
		消去方程两边“-2”可得
		\begin{equation*}
			 \sum_{i=1}^{n}(y_{i}-\hat{\beta}_{1}x_{i})x_{i}=0.
		\end{equation*}
		展开可得$\beta_{1}$的最小二乘估计为
		\begin{equation*}
			\hat{\beta}_{1} = \frac{\sum_{i=1}^{n}x_{i}y_{i}}{\sum_{i=1}^{n}x_{i}^2}.
		\end{equation*}
	\end{proof}

	\textbf{3}证明一元线性回归中
	\begin{equation*}
		\sum_{i=1}^{n}e_{i}=0, \sum_{i=1}^{n}x_{i}e_{i}=0.
	\end{equation*}
	\begin{proof}[\bf 解]
		对一元线性回归方程进行最小二乘估计,即寻找$\hat{\beta}_{0},\hat{\beta}_{1}$使得残差平方和最小化:
		\begin{equation*}
			\min\limits_{\hat{\beta}_{0},\hat{\beta}_{1}} \sum_{i=1}^{n} (y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})^2.
		\end{equation*}
		考虑到目标函数是关于$\hat{\beta}_{0},\hat{\beta}_{1}$的非负二次函数,因而它的最小值总是存在,其最小化的一阶条件为:
	\begin{equation*}
		\begin{cases}
			\frac{\partial}{\partial \hat{\beta}_{0}}\sum_{i=1}^{n} (y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})^2=-2\sum_{i=1}^{n}(y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})=0,\\
			\frac{\partial}{\partial \hat{\beta}_{1}}\sum_{i=1}^{n} (y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})^2=-2\sum_{i=1}^{n}(y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})x_{i}=0.
		\end{cases}
	\end{equation*}
	替换$y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i}$为残差$e_{i}$,消去“-2”,可得
	\begin{equation*}
		\begin{cases}
			\sum_{i=1}^{n}e_{i}=0,\\
			\sum_{i=1}^{n}e_{i}x_{i}=0.
		\end{cases}
	\end{equation*}
	\end{proof}

	\textbf{4}回归方程$E(y)=\beta_{0}+\beta_{1}x$的参数$\beta_{0},\beta_{1}$的最小二乘估计与极大似然估计在什么条件下等价?给出证明.
	\begin{proof}
		在假设$\epsilon_{i}\sim N(0,\sigma^2)$时最小二乘估计与极大似然估计下是等价的.在正态性假设的条件下,易得$y_{i}\sim N(\beta_{0}+\beta_{1}x_{i},\sigma^2)$故$y_{i}$的概率密度函数为:
		\begin{equation*}
			f_{i}(y_{i})=\frac{1}{\sqrt{2\pi}\sigma}\mathrm{exp}\{-\frac{1}{2\sigma^2}[y_{i}-(\beta_{0}+\beta_{1}x_{i})]^2\}, i=1,2,\cdots,n.
		\end{equation*}
		根据极大似然估计原理,似然函数为:
		\begin{eqnarray}
			L(\beta_{0},\beta_{1},\sigma^2)&=&\prod_{i=1}^{n} \frac{1}{\sqrt{2\pi}\sigma}\mathrm{exp}\{-\frac{1}{2\sigma^2}[y_{i}-(\beta_{0}+\beta_{1}x_{i})]^2\} \nonumber\\
			&=& (2\pi\sigma^{2})^{-\frac{n}{2}}\mathrm{exp}\{-\frac{1}{2\sigma^2}\sum_{i=1}^{n}[y_{i}-(\beta_{0}+\beta_{1}x_{i})]^2\}. \nonumber
		\end{eqnarray}
		对似然函数取对数,得
		\begin{equation*}
			\ln(L)=-\frac{n}{2}\ln(2\pi\sigma^2)-\frac{1}{2\sigma^2}\sum_{i=1}^{n}[y_{i}-(\beta_{0}+\beta_{1}x_{i})]^2.
		\end{equation*}
		分别对$\beta_{0},\beta_{1}$求偏导,可得
		\begin{equation*}
			\begin{cases}
				\frac{\partial}{\partial \hat{\beta}_{0}}\frac{1}{\sigma^{2}}\sum_{i=1}^{n} (y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})^2=\frac{1}{\sigma^{2}}\sum_{i=1}^{n}(y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})=0,\\
				\frac{\partial}{\partial \hat{\beta}_{1}}\frac{1}{\sigma^{2}}\sum_{i=1}^{n} (y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})^2=\frac{1}{\sigma^{2}}\sum_{i=1}^{n}(y_{i}-\hat{\beta}_{0}-\hat{\beta}_{1}x_{i})x_{i}=0.
			\end{cases}
		\end{equation*}	
		此时与普通最小二乘法的目标函数仅相差常数倍,求解出的参数估计表达式相同.
	\end{proof}
	
	\textbf{5}证明$\hat{\beta}_{0},\hat{\beta}_{1}$是无偏估计.
	\begin{proof}[\bf 解]
		普通最小二乘法中$\hat{\beta}_{0},\hat{\beta}_{1}$的估计量为:
		\begin{equation*}
			\hat{\beta}_{0}=\bar{y}-\hat{\beta}_{1}\bar{x}
			,\hat{\beta}_{1} = \frac{\sum_{i=1}^{n}(x_{i}-\bar{x})(y_{i}-\bar{y})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}.
		\end{equation*}
		将$\hat{\beta}_{1}$的分子展开,易得
		\begin{equation*}
			\hat{\beta}_{1} = \frac{\sum_{i=1}^{n}(x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2} y_{i}=\sum_{i=1}^{n}\frac{x_{i}-\bar{x}}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}y_{i}.
		\end{equation*}
		在两边同时取期望,可得
		\begin{eqnarray}
			E(\hat{\beta}_{1})&=&\sum_{i=1}^{n}\frac{x_{i}-\bar{x}}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}E(y_{i}) \nonumber \\
			&=&\sum_{i=1}^{n}\frac{x_{i}-\bar{x}}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}(\beta_{0}+\beta_{1}x_{i}) \nonumber\\
			&=&0+\frac{\sum_{i=1}^{n}(x_{i}-\bar{x})x_{i}}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}\beta_{1}\nonumber\\
			&=&\frac{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}\beta_{1}= \beta_{1}. \label{一元线性回归beta1无偏估计}
		\end{eqnarray}
		对$\hat{\beta}_{0}=\bar{y}-\hat{\beta}_{1}\bar{x}$两边同时取期望,可得
		\begin{equation}
			E(\hat{\beta}_{0})=E(\frac{1}{n}\sum_{i=1}^{n}\beta_{0}+\beta_{1}x_{i}+\epsilon_{i})-E(\hat{\beta}_{1}\bar{x}). \nonumber
		\end{equation}
		由式(\ref{一元线性回归beta1无偏估计})可得
		\begin{eqnarray}
			E(\hat{\beta}_{0})&=& \beta_{0} + \beta_{1}\bar{x} - \beta_{1}\bar{x} \nonumber\\ 
			&=& \beta_{0} \label{一元线性回归beta0无偏估计}
		\end{eqnarray}
	\end{proof}

	\textbf{6}证明$Var(\hat{\beta}_{0})=[\frac{1}{n}+\frac{\bar{x}^2}{\sum_{i=1}^{n}(x_{i}-\bar{x})^{2}}]\sigma^{2}$
	\begin{proof}
		$\beta_{0},\beta_{1}$的估计量为:
		\begin{eqnarray}
			\label{beta0估计量}
			\hat{\beta}_{0} &=& \bar{y} - \hat{\beta}_{1}\bar{x} \\
			\label{beta1估计量}
			\hat{\beta}_{1}&=&\sum_{i=1}^{n}\frac{(x_{i}-\bar{x})}{\sum_{j=1}^{n}(x_{i}-\bar{x})^2}y_{i}
		\end{eqnarray}
		将式(\ref{beta1估计量})带入式(\ref{beta0估计量})得
		\begin{equation}
			\label{beta1带入beta0}
			\hat{\beta}_{0} = \bar{y} - \sum_{i=1}^{n}\frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}y_{i} 
		\end{equation}
		对式(\ref{beta1带入beta0})左右两边同时取方差，由$y_{1},y_{2},\cdots,y_{n}$之间相互独立,得
		
		\begin{eqnarray}
		    Var(\hat{\beta_{0}}) &=& Var(\sum_{i=1}^{n}[\frac{1}{n} - \frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}]y_{i}) \nonumber\\
		    &=& \sum_{i=1}^{n}Var([\frac{1}{n} - \frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}]y_{i})  \nonumber\\
            &=&\sum_{i=1}^{n}[\frac{1}{n} - \frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2}]^{2}Var(y_{i}) \nonumber \\
            &=& \sum_{i=1}^{n}[\frac{1}{n^{2}} + (\frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2})^{2} - \frac{2}{n}\cdot(\frac{\bar{x} (x_{i}-\bar{x})}{\sum_{i=1}^{n}(x_{i}-\bar{x})^2})]\sigma^{2} \\
            &=& [\frac{1}{n}+\frac{\bar{x}^2}{\sum_{i=1}^{n}(x_{i}-\bar{x})^{2}}]\sigma^{2} \nonumber
        \end{eqnarray}
	\end{proof}
	\textbf{7}证明平方和分解式$SST=SSR+SSE$
	\begin{proof}
		\begin{eqnarray}
			\sum_{i=1}^{n}(y_{i}-\bar{y})^2&=&\sum_{i=1}^{n}(y_{i}-\hat{y}_{i}+\hat{y}_{i}-\bar{y})^2 \nonumber \\
			&=& \sum_{i=1}^{n}(y_{i}-\hat{y}_{i})^2 + \sum_{i=1}^{n}(\hat{y}_{i}-\bar{y})^2+2\sum_{i=1}^{n}(y_{i}-\hat{y}_{i})(\hat{y}_{i}-\bar{y}) \nonumber \\
			&=& SSE + SSR + 2\sum_{i=1}^{n}(y_{i}-\hat{y}_{i})(\hat{y}_{i}-\bar{y}) \nonumber \\
			&=& SSE + SSR + 2\sum_{i=1}^{n}e_{i}(\hat{y}_{i}-\bar{y})
		\end{eqnarray}
		故只需证明:
		\begin{equation*}
			\sum_{i=1}^{n}e_{i}(\hat{y}_{i}-\bar{y})=0
		\end{equation*}
		即证
		\begin{equation*}
			\sum_{i=1}^{n}e_{i}(\hat{\beta}_{0}+\hat{\beta}_{1}x_{i}-\bar{y})=\hat{\beta}_{0}\sum_{i=1}^{n}e_{i} + \hat{\beta}_{1}\sum_{i=1}^{n}e_{i}x_{i}+\bar{y}\sum_{i=1}^{n}e_{i}.
		\end{equation*}
		根据问题3中的结果
		\begin{equation*}
			\sum_{i=1}^{n}e_{i}=0, \sum_{i=1}^{n}x_{i}e_{i}=0.
		\end{equation*}
		易得
		\begin{equation*}
		\hat{\beta}_{0}\sum_{i=1}^{n}e_{i} + \hat{\beta}_{1}\sum_{i=1}^{n}e_{i}x_{i}+\bar{y}\sum_{i=1}^{n}e_{i}=0.
		\end{equation*}
		故
		\begin{equation*}
			SST=SSR+SSE.
		\end{equation*}
	\end{proof}
	
	\textbf{9}验证$Var(e_{i})=[1-\frac{1}{n}-\frac{(x_{i}-\bar{x})^2}{L_{xx}}]\sigma^{2}$ 
	\begin{proof}
		\begin{eqnarray}
			Var(e_{i}) &=& Var(y_{i}-\hat{y}_{i}) \nonumber\\
			&=& Var(y_{i}) + Var(\hat{y}_{i}) - 2Cov(y_{i},\hat{y}_{i}) \nonumber \\
			&=& Var(y_{i}) + Var(\hat{\beta}_{0}+\hat{\beta}_{1}x_{i}) - 2Cov(y_{i},\hat{\beta}_{0}+\hat{\beta}_{1}x_{i})\nonumber\\
			&=& \sigma^{2} + Var(\bar{y}-\hat{\beta}_{1}\bar{x}+\hat{\beta}_{1}x_{i}) - 2Cov(y_{i},\bar{y}-\hat{\beta}_{1}\bar{x} +\hat{\beta}_{1}x_{i})  \nonumber \\
			&=& \sigma^{2} + Var(\bar{y}+\hat{\beta}_{1}(x_{i}-\bar{x})) - 2Cov(y_{i},\bar{y}+\hat{\beta}_{1}(x_{i}-\bar{x}))   \label{2-9-1}
		\end{eqnarray}
		其中
		\begin{eqnarray}
			Var(\bar{y}+\hat{\beta}_{1}(x_{i}-\bar{x})) &=& Var[\sum_{j=1}^{n}(\frac{1}{n}+\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{k=1}^{n}(x_{k}-\bar{x})^2})y_{j}] \nonumber \\
			&=& \sum_{j=1}^{n}(\frac{1}{n}+\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{k=1}^{n}(x_{k}-\bar{x})^2})^2Var(y_{j}) \nonumber \\
			&=& \sum_{j=1}^{n}(\frac{1}{n^{2}}+(\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{k=1}^{n}(x_{k}-\bar{x})^2})^2+\frac{2}{n}\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{k=1}^{n}(x_{k}-\bar{x})^2})\sigma^{2} \nonumber  \\
			&=& (\frac{1}{n} +\frac{(x_{i}-\bar{x})^2}{\sum_{k=1}^{n}(x_{k}-\bar{x})^{2}})\sigma^2 \label{2-9-2}
		\end{eqnarray}
		在倒数第二个等式中应用等式$\sum_{i=1}^{n}(x_{j}-\bar{x})=0$易得$\sum_{j=1}^{n}\frac{2}{n}\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{k=1}^{n}(x_{k}-\bar{x})^2})=0$.
		\begin{equation}
			Cov(y_{i},\bar{y}+\hat{\beta}_{1}(x_{i}-\bar{x}))=Cov(y_{i},\frac{1}{n}\sum_{i=1}^{n}y_{i}) + Cov(y_{i},\sum_{j=1}^{n}\frac{(x_{j}-\bar{x})(x_{i}-\bar{x})}{\sum_{i=k}^{n}(x_{k}-\bar{x})^{2}}y_{j}) \nonumber
		\end{equation}
		根据$y_{i}$与$y_{j},i\neq j$之间相互独立 
		\begin{eqnarray}
			&=& \frac{1}{n}\sigma^2 + \frac{(x_{i}-\bar{x})(x_{i}-\bar{x})}{\sum_{i=k}^{n}(x_{k}-\bar{x})^{2}}\sigma^2 \nonumber \\ \label{2-9-3}
			&=& \frac{1}{n}\sigma^2 + \frac{(x_{i}-\bar{x})^2}{\sum_{k=1}^{n}(x_{k}-\bar{x})^{2}}\sigma^2.
		\end{eqnarray}
		将式(\ref{2-9-2})与(\ref{2-9-3})代入(\ref{2-9-1})可得
		\begin{equation*}
			Var(e_{i})=[1-\frac{1}{n}-\frac{(x_{i}-\bar{x})^2}{L_{xx}}]\sigma^{2}.
		\end{equation*}
	\end{proof}
	\textbf{10}用\textbf{9}题证明$\hat{\sigma}^2=\frac{1}{n-2}\sum_{i=1}^{n}(y_{i}-\hat{y}_{i})^2$是$\sigma^2$的无偏估计. 
	\begin{proof}
		\begin{eqnarray}
			E(\hat{\sigma}^2)&=&\frac{1}{n-2}\sum_{i=1}^{n}E((y_{i}-\hat{y}_{i})^2) \nonumber \\
			&=& \frac{1}{n-2}\sum_{i=1}^{n}E(e^2) \nonumber\\
			&=& \frac{1}{n-2}\sum_{i=1}^{n}Var(e^{2}) \nonumber \\
			&=& \frac{1}{n-2}\sum_{i=1}^{n}[1-\frac{1}{n}-\frac{(x_{i}-\bar{x})^2}{L_{xx}}]\sigma^{2} \nonumber \\
			&=& \frac{1}{n-2}(n-2)\sigma^2=\sigma^2 \nonumber
		\end{eqnarray}

	\end{proof}
	\textbf{14}为了调查某广告对销售收入的影响,某商店记录了五个月的销售收入$y$(万元)和广告费用$x$(万元),数据如表所示
	\begin{table}[H]
		\centering
		\begin{tabular}{cccccc}
			\toprule
			\toprule
			月份      & 1 & 2 & 3 & 4 & 5  \\ 
			\midrule
			x        & 1 & 2 & 3 & 4 & 5  \\ 
			y        & 10& 10& 20& 20& 40 \\ 
			\bottomrule
		\end{tabular}
	\caption{2-14回归数据}
	\end{table}
	(1) 绘制散点图.
	
	根据回归数据绘制散点图:
	\begin{figure}[htbp]
		\label{scatter_2_14}
		\centering
		\includegraphics[scale=1]{Graph//scatter_2_14.png}
		\caption{广告费用与销售收入散点图}
	\end{figure} 
	
	绘图代码：
	\begin{lstlisting}[language=R]
		> library(ggplot2)
		> x <- c(1,2,3,4,5)
		> y <- c(10,10,20,20,40)
		> p <-ggplot(data_df,aes(x=x,y=y))+geom_point(colour='#990000')+
		     xlab("广告费用(万元)")+
		     ylab("销售收入(万元)")
		
		> ggsave("scatter_2_14.png",p,width = 5, height = 3)
	\end{lstlisting}

	(2) x与y是否大致呈线性关系？
	
	根据图\ref{scatter_2_14} $x$与$y$之间大致呈线性关系.
	
	(3) 用最小二乘估计求出回归方程.
	
	利用R语言自带的线性模型函数进行估计,R代码如下:
	
	\begin{lstlisting}[language=R]
		> model = lm(y~x) #一元线性回归模型(包含截距项)
		> summary(model)
		Call:
		lm(formula = y ~ x)
		
		Residuals:
		1          2          3          4          5 
		4.000e+00 -3.000e+00 -3.775e-15 -7.000e+00  6.000e+00 
		
		Coefficients:
					   Estimate Std. Error t value Pr(>|t|)  
		(Intercept)   -1.000      6.351  -0.157   0.8849  
		x              7.000      1.915   3.656   0.0354 *
		---
		Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
		
		Residual standard error: 6.055 on 3 degrees of freedom
		Multiple R-squared:  0.8167,	Adjusted R-squared:  0.7556 
		F-statistic: 13.36 on 1 and 3 DF,  p-value: 0.03535
	\end{lstlisting}
	其估计的方程为：
	\begin{equation*}
		y=7x-1
	\end{equation*}
	
	(4) 求回归标准误差$\hat{\sigma}^2$.
	
	根据(3)中的结果可知$\hat{\sigma}^2=6.055$
	
	(5) 给出$\hat{\beta}_{0}$与$\hat{\beta}_{1}$的置信度为$95\%$的置信区间
	
	使用命令与结果如下
	\begin{lstlisting}[language=R]
		> confint(model)
		                  2.5 %   97.5 %
		(Intercept) -21.2112485 19.21125
		x             0.9060793 13.09392
	\end{lstlisting}
	
	(6) 计算$x$与$y$的决定系数.
	
	根据(3)中的结果可决定系数$R^2=0.8167$
	
	(7) 对回归方程做方差分析.
	
	使用如下命令进行回归方程的方差分析：
	\begin{lstlisting}[language=R]
		> anova(model)
		Analysis of Variance Table
		
		Response: y
		           Df Sum Sq Mean Sq F value  Pr(>F)  
		x          1    490  490.00  13.364 0.03535 *
		Residuals  3    110   36.67                  
		---
		Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	\end{lstlisting}

	其结果显示,F统计量的观测值为13.364,对应的概率$P$值为$0.035<0.05$,在显著性水平为$5\%$的水平拒绝原假设,认为回归方程显著,即回归系数不全为0.
	
 	(8) 做回归系数$\beta_{1}$的显著性检验
 	
 	根据(3)中的结果$t$统计量的观测值为3.656,对应的概率$P$值为$0.0354$在显著性水平为$5\%$的水平拒绝原假设,认为回归系数$\beta_{1}$不为0.
 	
 	(9) 做相关系数的显著性检验
 	
 	使用如下命令进行相关系数的显著性检验:
 	\begin{lstlisting}[language=R]
 		> cor.test(x,y,method='pearson')
 	\end{lstlisting}
 	\begin{lstlisting}
 			Pearson's product-moment correlation
 		
 		data:  x and y
 		t = 3.6556, df = 3, p-value = 0.03535
 		alternative hypothesis: true correlation is not equal to 0
 		95 percent confidence interval:
 		0.1057216 0.9936915
 		sample estimates:
 		cor 
 		0.9036961 
 	\end{lstlisting}
 	
	根据结果显示,样本相关系数为0.903,相关系数检验的$t$统计量的观测值为$3.6556$,对应的概率$P$值为$0.03535$,在显著性水平为$5\%$的条件下,拒绝相关系数为0的原假设,认为$x$与$y$之间存在显著的线性相关关系.
	
	(10) 对回归方程作残差图并作相应分析.
	
	\begin{figure}[htbp]

		\centering
		\includegraphics{Graph//resplot_2_14.png}
		\caption{残差与$x$散点图}
		\label{resplot_2_14}
	\end{figure}

	根据图\ref{resplot_2_14}残差在$e=0$附近随机变化,可认为回归模型满足基本假定. 
	
	(11) 求当广告费为4.2万元时,销售收入将达到多少,并给出置信度95\%的置信区间
	
	使用如下R代码：
	\begin{lstlisting}[language=R]
	> predict.lm(model, newdata=data.frame(x=4.2), interval = 'confidence',level=0.95)
	  fit      lwr      upr
	1 28.4 17.09746 39.70254	
	\end{lstlisting}

	得到$x=4.2$时$y$的估计值为28.4万元,置信区间为$[17.09746,39.70254]$
	
	\textbf{15} 一家保险公司十分关心其总公司营业部加班的程度,决定认真调查一下现状.经过10周的调查,收集了每周加班时间的数据和签发的新保单的数目,$x$为每周签发的新保单的数目,$y$为每周加班时间(小时),数据如表所示：
		\begin{table}[H]
		\centering
		\begin{tabular}{ccccccccccc}
			\toprule
			\toprule
			月份      &   1   &   2   &  3   &   4   &   5   &   6   &  7   &  8  &  9  & 10    \\ 
			\midrule
			x        &  825  &  215  & 1070 &  550  &  480  &  920  & 1350 & 325 & 670 & 1215  \\ 
			y        & 3.5   &   1   &   4  &   2   &   1   &   3   &  4.5 & 1.5 &  3  &  5    \\ 
			\bottomrule
		\end{tabular}
		\caption{2-15回归数据}
	\end{table}
	
	(1) 画散点图
	
	绘制散点图代码如下:
	\begin{lstlisting}[language=R]
		> x <- c(825, 215, 1070, 550, 480, 920, 1350, 325, 670, 1215)
		> y <- c(3.5, 1, 4, 2, 1, 3, 4.5, 1.5, 3, 5)
		> data_df <- data.frame(x=x,y=y)
		> p <-ggplot(data_df,aes(x=x,y=y))+geom_point(colour='#990000')+
		  xlab("每周签发新保单数量")+
		  ylab("加班时间(小时)")
		> ggsave("scatter_2_15.png",p,width = 5, height = 3)
	\end{lstlisting}
	\begin{figure}[htbp]
	
	\centering
	\includegraphics{Graph//scatter_2_15.png}
	\caption{每周签发新保单数量与加班时间(小时)散点图}
	\label{scatter_2_15}
	\end{figure}
	
	(2) $x,y$是否大致呈线性关系？
	
	根据图\ref{scatter_2_15},$x,y$呈线性关系.
	
	(3) 用最小二乘法估计方程.
	
		\begin{lstlisting}[language=R]
	> model = lm(y~x) #一元线性回归模型(包含截距项)
	> summary(model)
	Call:
	lm(formula = y ~ x)
	
	Residuals:
	Min       1Q   Median       3Q      Max 
	-0.83899 -0.33483  0.07842  0.37228  0.52594 
	
	Coefficients:
	             Estimate  Std. Error t value Pr(>|t|)    
	(Intercept) 0.1181291  0.3551477   0.333    0.748    
	x           0.0035851  0.0004214   8.509 2.79e-05 ***
	---
	Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	
	Residual standard error: 0.48 on 8 degrees of freedom
	Multiple R-squared:  0.9005,	Adjusted R-squared:  0.8881 
			F-statistic:  72.4 on 1 and 8 DF,  p-value: 2.795e-05

		\end{lstlisting}
	
	根据结果,对应的回归方程为:
	\begin{equation*}
		y = 0.0035851x+0.1181291
	\end{equation*}
	(4) 求回归标准误$\hat{\sigma}$
		
	根据(3)中结果$\hat{\sigma}=0.48$
	
	(5) 给出$\hat{\beta}_{0},\hat{\beta}_{1}$置信度为的95\%的区间估计.
	
	\begin{lstlisting}[language=R]
	> confint(model)
	                   2.5 %      97.5 %
	(Intercept) -0.700843004 0.937101152
	x            0.002613486 0.004556779
	\end{lstlisting}

	其$\hat{\beta}_{0},\hat{\beta}_{1}$的置信区间分别为:$[-0.700843004,0.937101152],[0.002613486,0.004556779]$
	
	(6)计算$x,y$的决定系数.
	
	根据(3)中结果$x,y$的决定系数$R^2=0.9005$
	
	(7) 作回归系数的$\hat{\beta}_{1}$的显著性检验
	
	根据(3)中结果,$t$统计量的观测值为$8.509$,对应的概率P值为$2.79e-05<0.01$,在显著性水平为1\%的条件下拒绝回归系数为0的原假设,认为其系数显著.
	
	(8) 对回归方程作方差分析.
	
	\begin{lstlisting}[language=R]
	> anova(model)
	Analysis of Variance Table
	
	Response: y
	           Df  Sum Sq Mean Sq F value    Pr(>F)    
	x          1 16.6816 16.6816  72.396 2.795e-05 ***
	Residuals  8  1.8434  0.2304                      
	---
	Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	\end{lstlisting}
	其结果显示,F统计量的观测值为72.396,对应的概率$P$值为$2.795e-05<0.01$,在显著性水平为$1\%$的水平拒绝原假设,认为回归方程显著,即回归系数不全为0.
	
	(9) 做相关系数的显著性检验.
	
	\begin{lstlisting}[language=R]
	> cor.test(x,y,method = 'pearson')
	\end{lstlisting}

 	\begin{lstlisting}
	Pearson's product-moment correlation
	
	data:  x and y
	t = 8.5086, df = 8, p-value = 2.795e-05
	alternative hypothesis: true correlation is not equal to 0
	95 percent confidence interval:
	0.7932921 0.9881624
	sample estimates:
	cor 
	0.9489428 
	\end{lstlisting}
	
	根据结果显示,样本相关系数为0.9489428,相关系数检验的$t$统计量的观测值为$8.5086$,对应的概率$P$值为$2.795e-05$,在显著性水平为$1\%$的条件下,拒绝相关系数为0的原假设,认为$x$与$y$之间存在显著的线性相关关系.
	
	(10) 对回归方程作残差图并做出相应分析
	
	\begin{lstlisting}[language=R]
	> df <- data.frame(x=x,residuals=model$residuals)
	> p<-ggplot(df,aes(x=x,y=residuals))+geom_point(colour='#990000')+
	> xlab("residuals")+
	> ylab("df")
	> ggsave("resplot_2_15.png",p,width = 5, height = 3)
	\end{lstlisting}

	\begin{figure}[htbp]
	
	\centering
	\includegraphics{Graph//resplot_2_15.png}
	\caption{残差与$x$散点图}
	\label{resplot_2_15}
	\end{figure}
	根据图\ref{resplot_2_15}残差在$e=0$附近随机变化,可认为回归模型满足基本假定.
	
	(11) 该公司预计下一周签发新保单1000张,需要的加班时间是多少?
	
	\begin{lstlisting}[language=R]
	> predict(model,newdata = data.frame(x=1000))
	3.703262
	\end{lstlisting}
	预计需要的加班时间为:3.703262
	
	(12) 给出$y_{0}$的置信度为95\%的精确预测区间与近似置信区间
	
	\begin{lstlisting}[language=R]
	> predict(model,newdata = data.frame(x=1000),interval = "prediction")
	     fit     lwr      upr
	1 3.703262 2.51949 4.887033
	\end{lstlisting}
	根据近似区间计算公式
	\begin{equation*}
		[y_{0}-2\sigma, y_{0}+2\sigma]
	\end{equation*}
	可得精确预测区间为:$[2.51949,4.887033]$,近似预测区间为：$[2.743262,4.663262]$
	
	(13) 给出$E(y_{0})$置信度为95\%的估计区间.
	
	\begin{lstlisting}[language=R]
	> predict(model,newdata = data.frame(x=1000),interval = "confidence")
	       fit      lwr      upr
	1 3.703262 3.283728 4.122795
	\end{lstlisting}
	$E(y_{0})$置信度为95\%的估计区间为:$[3.283728,4.122795]$.